import re

s1 = "hello,123,okokoko123 hello ;"

r1 = re.findall("\d",s1)
r2 = re.findall("\d+",s1)
r3 = re.findall("hello",s1)

# print(r1)
# print(r2)
# print(r3)
#
s2 = "hellookoko12121 start 123 kkk lllk hello 00 endk;llkjdfdfd"
# 找出start到end之间的内容
r4 = re.findall("start.*?end",s2)
# print(r4)

# 处理一下百度贴吧返回的数据，用正则来提取出能被xpath解析的部分
# 开始： <ul id="thread_list"
# 结束： <div class="thread_list_bottom clearfix">
start = '<ul id="thread_list"'
end = '<div class="thread_list_bottom clearfix">'

f = open("temp1","r",encoding="utf-8")
s3 = f.read()
# print(s3)
r5 = re.findall(start+".*?"+end,s3,re.DOTALL)
print(r5[0])